Skip to content

Conversation

@kper
Copy link
Contributor

@kper kper commented Nov 18, 2025

Created a pattern to recognize llvm.ucmp and llvm.scmp.
Alive Proof: https://alive2.llvm.org/ce/z/BYRyu-

Closes #166579

@kper kper requested a review from nikic as a code owner November 18, 2025 09:16
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Nov 18, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 18, 2025

@llvm/pr-subscribers-llvm-transforms

Author: None (kper)

Changes

Created a pattern to recognize llvm.ucmp and llvm.scmp.
Alive Proof: https://alive2.llvm.org/ce/z/BYRyu-

Closes #166579


Full diff: https://github.com/llvm/llvm-project/pull/168505.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+45)
  • (modified) llvm/test/Transforms/InstCombine/select-cmp.ll (+114)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 9572f9d702e1b..5c8008700e181 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1955,6 +1955,48 @@ static Instruction *foldSelectICmpEq(SelectInst &SI, ICmpInst *ICI,
   return nullptr;
 }
 
+/// Transform
+///
+/// select(icmp(eq, X, Y), Z, select(icmp(ult, X, Y), -1, 1))
+/// into select(icmp(eq, X, Y), Z, llvm.ucmp(freeze(X), freeze(Y)))
+///
+/// or
+///
+/// select(icmp(eq, X, Y), Z, select(icmp(slt, X, Y), -1, 1))
+/// into select(icmp(eq, X, Y), Z, llvm.scmp(freeze(X), freeze(Y)))
+static Value *foldSelectToInstrincCmp(SelectInst &SI, const ICmpInst *ICI,
+                                      Value *TrueVal, Value *FalseVal,
+                                      InstCombiner::BuilderTy &Builder) {
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+
+  if (Pred != ICmpInst::ICMP_EQ)
+    return nullptr;
+
+  CmpPredicate IPred;
+  if (match(FalseVal, m_Select(m_ICmp(IPred, m_Specific(ICI->getOperand(0)),
+                                      m_Specific(ICI->getOperand(1))),
+                               m_AllOnes(), m_One())) &&
+      (IPred == ICmpInst::ICMP_ULT || IPred == ICmpInst::ICMP_SLT)) {
+    Value *X = ICI->getOperand(0);
+    Value *Y = ICI->getOperand(1);
+    Builder.SetInsertPoint(&SI);
+    auto IID = IPred == ICmpInst::ICMP_ULT ? Intrinsic::ucmp : Intrinsic::scmp;
+
+    // Edge Case: if Z is the constant 0 then the select can be folded
+    // to just the instrinsic comparison.
+    if (match(TrueVal, m_Zero()))
+      return Builder.CreateIntrinsic(SI.getType(), IID, {X, Y});
+
+    Value *FrozenX = Builder.CreateFreeze(X, X->getName() + ".frz");
+    Value *FrozenY = Builder.CreateFreeze(Y, Y->getName() + ".frz");
+    Value *Cmp =
+        Builder.CreateIntrinsic(FrozenX->getType(), IID, {FrozenX, FrozenY});
+    return Builder.CreateSelect(SI.getCondition(), TrueVal, Cmp, "select.ucmp");
+  }
+
+  return nullptr;
+}
+
 /// Fold `X Pred C1 ? X BOp C2 : C1 BOp C2` to `min/max(X, C1) BOp C2`.
 /// This allows for better canonicalization.
 Value *InstCombinerImpl::foldSelectWithConstOpToBinOp(ICmpInst *Cmp,
@@ -2186,6 +2228,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
   if (Value *V = foldSelectWithConstOpToBinOp(ICI, TrueVal, FalseVal))
     return replaceInstUsesWith(SI, V);
 
+  if (Value *V = foldSelectToInstrincCmp(SI, ICI, TrueVal, FalseVal, Builder))
+    return replaceInstUsesWith(SI, V);
+
   return Changed ? &SI : nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/select-cmp.ll b/llvm/test/Transforms/InstCombine/select-cmp.ll
index b1bd7a0ecc8ac..bf1a6cb047c37 100644
--- a/llvm/test/Transforms/InstCombine/select-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-cmp.ll
@@ -808,5 +808,119 @@ define i1 @icmp_lt_slt(i1 %c, i32 %arg) {
   ret i1 %select
 }
 
+define i16 @icmp_fold_to_llvm_ucmp_when_eq(i16 %x, i16 %y) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_eq(
+; CHECK-NEXT:    [[Y_FRZ:%.*]] = freeze i16 [[Y:%.*]]
+; CHECK-NEXT:    [[X_FRZ:%.*]] = freeze i16 [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X_FRZ]], [[Y_FRZ]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.ucmp.i16.i16(i16 [[X_FRZ]], i16 [[Y_FRZ]])
+; CHECK-NEXT:    [[SELECT_UCMP:%.*]] = select i1 [[TMP1]], i16 42, i16 [[TMP2]]
+; CHECK-NEXT:    ret i16 [[SELECT_UCMP]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 42, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_when_ult_and_Z_zero(i16 %x, i16 %y) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_ult_and_Z_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.ucmp.i16.i16(i16 [[X:%.*]], i16 [[Y:%.*]])
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 0, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_when_slt_and_Z_zero(i16 %x, i16 %y) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_slt_and_Z_zero(
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.scmp.i16.i16(i16 [[X:%.*]], i16 [[Y:%.*]])
+; CHECK-NEXT:    ret i16 [[TMP1]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp slt i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 0, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_when_cmp_slt(i16 %x, i16 %y) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_cmp_slt(
+; CHECK-NEXT:    [[Y_FRZ:%.*]] = freeze i16 [[Y:%.*]]
+; CHECK-NEXT:    [[X_FRZ:%.*]] = freeze i16 [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X_FRZ]], [[Y_FRZ]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.scmp.i16.i16(i16 [[X_FRZ]], i16 [[Y_FRZ]])
+; CHECK-NEXT:    [[SELECT_UCMP:%.*]] = select i1 [[TMP1]], i16 42, i16 [[TMP2]]
+; CHECK-NEXT:    ret i16 [[SELECT_UCMP]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp slt i16 %x, %y ; here "ult" changed to "slt"
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 42, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_when_value(i16 %x, i16 %y, i16 %Z) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_value(
+; CHECK-NEXT:    [[Y_FRZ:%.*]] = freeze i16 [[Y:%.*]]
+; CHECK-NEXT:    [[X_FRZ:%.*]] = freeze i16 [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X_FRZ]], [[Y_FRZ]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.ucmp.i16.i16(i16 [[X_FRZ]], i16 [[Y_FRZ]])
+; CHECK-NEXT:    [[SELECT_UCMP:%.*]] = select i1 [[TMP1]], i16 [[Z:%.*]], i16 [[TMP2]]
+; CHECK-NEXT:    ret i16 [[SELECT_UCMP]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 %Z, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_when_ne(i16 %x, i16 %y) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_when_ne(
+; CHECK-NEXT:    [[Y_FRZ:%.*]] = freeze i16 [[Y:%.*]]
+; CHECK-NEXT:    [[X_FRZ:%.*]] = freeze i16 [[X:%.*]]
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i16 [[X_FRZ]], [[Y_FRZ]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.ucmp.i16.i16(i16 [[X_FRZ]], i16 [[Y_FRZ]])
+; CHECK-NEXT:    [[SELECT_UCMP:%.*]] = select i1 [[DOTNOT]], i16 42, i16 [[TMP1]]
+; CHECK-NEXT:    ret i16 [[SELECT_UCMP]]
+;
+  %3 = icmp ne i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 1
+  %6 = select i1 %3, i16 %5, i16 42
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_negative_test_invalid_constant_1(i16 %x, i16 %y, i16 %Z) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_negative_test_invalid_constant_1(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[Z:%.*]], i16 1
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 1, i16 1 ; invalid constant
+  %6 = select i1 %3, i16 %Z, i16 %5
+  ret i16 %6
+}
+
+define i16 @icmp_fold_to_llvm_ucmp_negative_test_invalid_constant_2(i16 %x, i16 %y, i16 %Z) {
+; CHECK-LABEL: @icmp_fold_to_llvm_ucmp_negative_test_invalid_constant_2(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i16 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i16 [[Z:%.*]], i16 -1
+; CHECK-NEXT:    ret i16 [[TMP2]]
+;
+  %3 = icmp eq i16 %x, %y
+  %4 = icmp ult i16 %x, %y
+  %5 = select i1 %4, i16 -1, i16 -1 ; invalid constant
+  %6 = select i1 %3, i16 %Z, i16 %5
+  ret i16 %6
+}
+
 declare void @use(i1)
 declare void @use.i8(i8)

@kper
Copy link
Contributor Author

kper commented Nov 18, 2025

@dtcxzyw could you run the benchmarks to see whether this transformation is profitable?

@github-actions
Copy link

github-actions bot commented Nov 18, 2025

🐧 Linux x64 Test Results

  • 186436 tests passed
  • 4864 tests skipped

@kper
Copy link
Contributor Author

kper commented Nov 20, 2025

@dtcxzyw thanks, I fixed two bugs. First, the return type of the intrinsic wasn't correct. Second, the fold must not be applied to pointer comparisons.
Would you please rerun the benchmark? Thank you!

@kper
Copy link
Contributor Author

kper commented Nov 21, 2025

The last commit added another transformation to handle cmp4. While, it compiles exactly as mentioned in the issue, it is quite unfortunate that it didn't have any effects on the benchmarks.
That's why I would like to know your opinion whether this PR is even worth to be merged? @nikic @dtcxzyw

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the first pattern, s/ucmp is less profitable than select + icmp because the former one needs two icmp + two selects/one sub (See also TargetLowering::expandCMP). It doesn't simplify the IR since X and Y still have two uses after the transformation.

For the second pattern select(icmp(eq, X, Y), 0, llvm.cmp(X, Y)) -> llvm.cmp(X, Y), absorbing the equality test into s/ucmp is interesting. Unfortunately we haven't seen its existence in real-world programs.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[InstCombine] llvm.ucmp recognition/optimization issues

3 participants